package com.ustcinfo.study.scala.r1.zhangshouxin
import org.apache.spark.{SparkConf, SparkContext}
/*
 * @author zhangshouxin
 */
object WorldCount{
  def main(args : Array[String]): Unit ={
    val sparkConf=new SparkConf().setMaster("local").setAppName("WorldCount")//新建Sparkconf对象，设置maseter和appname
    val sc=new SparkContext(sparkConf)//新建SparkContext对象
    val txtRdd=sc.textFile("file:///D:\\city_dns.txt")//读取文件的路径
    txtRdd.filter(x => x.trim().length>0)//过滤掉空的行
      .map(x => x.split(" "))//按空格分开
      .flatMap(x => x)//重新构成新的元素集
      .map(x => (x,1))//转成kv结构
      .reduceByKey(_+_) //将V值相加
      .map(x => (x._2,x._1))//k、v调换顺序
      .sortByKey(false).take(10)//排序去前十
      .foreach(println)//遍历打印


  }
}
